import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from chap3.lyw.lyw.items import LywItem


class LieyunSpider(CrawlSpider):
    name = "lieyun"
    allowed_domains = ["lieyunpro.com"]
    start_urls = ["https://lieyunpro.com/latest/p1.html"]

    rules = (Rule(LinkExtractor(allow=r"/latest/p\d+.html"), callback="parse_index", follow=True),
             Rule(LinkExtractor(allow=r"/archives/\d+"), callback="parse_item", follow=False),)

    def parse_item(self, response):
        title = response.xpath("//img[@class='img-fuil img-round']/@alt").get()
        time = response.xpath("//span[@class='time']/text()").get()
        content = "".join(response.xpath("//div[@id='main-text-id']/*/text()").extract())
        url = response.url

        item = LywItem()
        item["title"] = title
        item["time"] = time
        item["url"] = url
        item["content"] = content
        yield item

    def parse_index(self, response):
        print(response.url)
        print('---------------------------------')